Notes

Interperation

Recursion vs generational change interpretation?

  • Anything significant about this interpretation?
library(knitr)
library(tidyverse)
f_p <- 'results-100.csv'
df_results <- read.csv(file.path(getwd(), 'outputs', f_p))
df_results <- df_results %>%
  mutate(is_final_generation=ifelse(is_final_generation=="True", TRUE, FALSE))
## Warning: package 'bindrcpp' was built under R version 3.4.4
cat(paste0(length(unique(df_results$seed))), "simulations run")
## 100 simulations run

Trajectories

# Get stable trajectories
stables <- df_results %>% 
  mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed)) %>%
  filter(is_final_generation) %>%
  mutate(stable=ifelse(that_rate > 0.001 & that_rate < 0.999, TRUE, FALSE)) %>%
  filter(stable)
stables <- stables$klass

that-rate trajectories

# Plot all trajectories  
n_samples <- 100
seed_samples <- sample(df_results$seed, n_samples)
df_results %>%
  filter(seed %in% seed_samples) %>%
  mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed),
         num_generations=as.numeric(num_generations),
         stable=ifelse(klass %in% stables, 'stable-optionality', 'conventionalized')) %>%
  ggplot(aes(x=num_generations, y=that_rate, col=stable, group=klass)) +
    geom_line(alpha=0.3) +
    theme_classic() +
    theme(legend.position="none") +
    ggtitle("that-rate trajectories")

# Facet by initial B_prob, t_prob
n_samples <- 10
seed_samples <- sample(df_results$seed, n_samples)
df_results %>%
  filter(seed %in% seed_samples) %>%
  mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed),
         num_generations=as.numeric(num_generations),
         stable=ifelse(klass %in% stables, 'stable-optionality', 'conventionalized')) %>%
  ggplot(aes(x=num_generations, y=that_rate, col=stable, group=klass)) +
    geom_line(alpha=0.3) +
    theme_classic() +
    theme(legend.position="none") +
    facet_grid(round(t_prob, 3)*round(B_prob, 3)~.) +
    ggtitle(paste0(n_samples, " samples"))

r trajectories

df_results %>%
  mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed),
         num_generations=as.numeric(num_generations),
         stable=ifelse(klass %in% stables, 'stable-optionality', 'conventionalized')) %>%
  ggplot(aes(x=num_generations, y=r, col=stable, group=klass)) +
    geom_line(alpha=0.3) +
    theme_classic() +
    theme(legend.position="none") +
    ggtitle("r trajectories")

# Facet by initial B_prob, t_prob
df_results %>%
  filter(seed %in% seed_samples) %>%
  mutate(klass=paste0('k-', k, 'c-', c, 'seed-', seed),
         num_generations=as.numeric(num_generations),
         stable=ifelse(klass %in% stables, 'stable-optionality', 'conventionalized')) %>%
  ggplot(aes(x=num_generations, y=r, col=stable, group=klass)) +
    geom_line(alpha=0.3) +
    theme_classic() +
    theme(legend.position="none") +
    facet_grid(round(t_prob, 3)*round(B_prob, 3)~.)

Reproducing R. Levy plots

Preprocessing

# R. Levy's preprocessing
# dat$stable <- with(dat,thatrate > 0.001 & thatrate < 0.999)
# dat <- subset(dat, ! (k==1.0 & c==0.0))
# dat.summary <- dat %>% group_by(k,c) %>%
#   dplyr:::summarise(stable=mean(stable),r=mean(r))

# This preprocessing is only valid for stable optionality plot
df_preprocessed <- df_results %>%
  filter(is_final_generation) %>%
  mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
  filter(k != 1.0, c != 0.0) %>%
  group_by(k, c) %>%
  summarise(stable=mean(stable), r=mean(r))

Stable optionality plot

df_preprocessed %>%
  ggplot(aes(k,c)) + 
    geom_tile(aes(fill=stable),colour="white") +
    labs(y=expression(paste("String length cost parameter ", c)), fill="stable\noptionality\nrate") +
    theme_classic() +
    scale_x_continuous(name=expression(paste("Nonuniformity penalization parameter ",k)),
                       breaks=seq(1,2,by=0.2))

Marginal frequency plot

## Distribution of marginal frequencies of optional marker t at fixed points with stable optionality
df_results %>%
  filter(is_final_generation) %>%
  mutate(stable=that_rate > 0.001 & that_rate < 0.999) %>%
  filter(stable) %>%
  ggplot(aes(x=that_rate,y=..density..)) + 
    geom_histogram(bins=42) + 
    scale_x_continuous(limits=c(-0.05,1.05)) +
    ylab("Probability density") +
    xlab(expression(paste("Marginal frequency of optional marker ",t))) +
    theme_classic()
## Warning: Removed 1 rows containing missing values (geom_bar).

Perason correlation plot

## distribution of correlations between phrase onset probability and t-rate at fixed points with stable optionality
df_results %>%
  filter(is_final_generation) %>%
  mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
  filter(stable) %>%
  ggplot(aes(x=r, y=..density..)) + 
    geom_histogram(bins=42) + 
    scale_x_continuous(limits=c(-1.05,1.05)) +
    ylab("Probability density") +
    xlab("Pearson correlation between\nphrase onset & t probabilities") + 
    theme_classic()

Additional analyses

df_results %>% 
  filter(is_final_generation) %>% 
  mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
  ggplot(aes(x=that_rate, y=r, col=stable)) +
    geom_point(alpha=0.3) +
    theme_classic()

Proportion stable?

df_results %>% 
  filter(is_final_generation) %>% 
  mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
  group_by(stable) %>%
  summarise(n=n()) %>%
  ungroup %>%
  mutate(total=sum(n),
         prop=n/total) %>%
  ggplot(aes(x=stable, y=prop)) +
    geom_bar(stat='identity') +
    geom_text(aes(x=stable, y=prop, label=round(prop, 2)), nudge_y=0.025) +
    ylim(0, 1) +
    ggtitle("Proportion of simulations leading to stable optionality") +
    theme_classic()

Descriptives

df_results %>% 
  mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
  filter(stable) %>%
  select(num_generations) %>%
  summary()
##  num_generations 
##  Min.   :  0.00  
##  1st Qu.:  1.00  
##  Median :  2.00  
##  Mean   :  5.36  
##  3rd Qu.:  6.00  
##  Max.   :100.00
df_results %>% 
  mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
  filter(stable) %>%
  group_by(num_generations) %>%
  summarise(cnt=n()) %>%
  ggplot(aes(x=num_generations, y=cnt)) +
    geom_bar(stat='identity') +
    theme_classic()

df_results %>%
  mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
  filter(stable) %>%
  mutate(num_generations=num_generations+1) %>%
  group_by(num_generations) %>%
  summarise(cnt=n()) %>%
  ggplot(aes(x=log(num_generations), y=log(cnt))) +
    geom_point(stat='identity', size=2.5, alpha=0.4) +
    geom_smooth(method='lm') +
    theme_classic()

Relation between t_prob and eventual stability?

df_results %>%
  filter(is_final_generation) %>%
  mutate(stable = that_rate > 0.001 & that_rate < 0.999) %>%
  filter(stable) %>%
  ggplot(aes(x=num_generations, y=t_prob, col=k, size=c)) +
    geom_point(alpha=0.3) +
    theme(legend.position = 'none') +
    theme_classic()

df_lm <- df_results %>% 
  filter(is_final_generation) %>%
  mutate(stable = that_rate > 0.001 & that_rate < 0.999)
reg <- lm(r~num_generations+k*c+B_prob+t_prob+stable, data=df_lm)
summary(reg)
## 
## Call:
## lm(formula = r ~ num_generations + k * c + B_prob + t_prob + 
##     stable, data = df_lm)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.17889 -0.21677 -0.02675  0.17935  1.56668 
## 
## Coefficients:
##                   Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)      0.4942606  0.0196194   25.192   <2e-16 ***
## num_generations  0.0052020  0.0003527   14.750   <2e-16 ***
## k               -0.1551294  0.0119264  -13.007   <2e-16 ***
## c                0.1878185  0.0144019   13.041   <2e-16 ***
## B_prob          -0.2701854  0.0054637  -49.451   <2e-16 ***
## t_prob          -1.1756335  0.0059721 -196.855   <2e-16 ***
## stableTRUE      -0.2431126  0.0050611  -48.036   <2e-16 ***
## k:c             -0.0829793  0.0092468   -8.974   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.335 on 44092 degrees of freedom
## Multiple R-squared:  0.5191, Adjusted R-squared:  0.519 
## F-statistic:  6799 on 7 and 44092 DF,  p-value: < 2.2e-16